#imports
import pip
import numpy as np
import pandas as pd
import seaborn as sns
import matplotlib.pyplot as plt
import plotly.express as px
sns.set()
import plotly.offline as pyo
import plotly.graph_objs as go
pyo.init_notebook_mode()
# Loading in the hierarcy
url = "https://raw.githubusercontent.com/bcaffo/MRIcloudT1volumetrics/master/inst/extdata/multilevel_lookup_table.txt"
multilevel_lookup = pd.read_csv(url, sep = "\t").drop(['Level5'], axis = 1)
multilevel_lookup = multilevel_lookup.rename(columns = {
"modify" : "roi",
"modify.1" : "level4",
"modify.2" : "level3",
"modify.3" : "level2",
"modify.4" : "level1"})
multilevel_lookup = multilevel_lookup[['roi', 'level3', 'level2', 'level1']]
# Loading in kirby21 all levels
df_all_levels = pd.read_csv("https://raw.githubusercontent.com/smart-stats/ds4bio_book/main/book/assetts/kirby21AllLevels.csv")
df_all_levels = df_all_levels.drop(['Unnamed: 0', 'min', 'max', 'mean', 'std'],\
axis = 1)
df_all_levels.head(4)
# Subject 127
id = 127
subjectData = df_all_levels.loc[(df_all_levels.type == 1) & (df_all_levels.level == 5) & (df_all_levels.id == id)]
subjectData = subjectData[['roi', 'volume']]
## Merge the subject data with the multilevel data
subjectData = pd.merge(subjectData, multilevel_lookup, on = "roi")
subjectData = subjectData.assign(icv = "ICV")
subjectData = subjectData.assign(comp = subjectData.volume / np.sum(subjectData.volume))
subjectData.head()
fig = px.sunburst(subjectData, path=['icv', 'level1', 'level2', 'level3', 'roi'],
values='comp', width=800, height=800)
fig.show()
# imports
import sqlite3 as sq3
import pandas as pd
con = sq3.connect("opioid.db")
sql = con.cursor()
# read in datasets
county_annual = pd.read_sql_query("SELECT * from annual", con)
county_pop = pd.read_sql_query("SELECT * from population", con)
county_land = pd.read_sql_query("SELECT * from land", con)
# close the connection
con.close
<function Connection.close>
#checking annual missingness
county_annual = county_annual.drop(['?'],\
axis = 1)
county_annual.replace("NA", np.nan, inplace=True)
county_annual[county_annual.isnull().any(axis=1)]
#checking population
county_pop = county_pop.drop(['?'],\
axis = 1)
county_pop.head(4)
county_pop.replace("NA", np.nan, inplace=True)
county_pop[county_pop.isnull().any(axis=1)]
#checking land
county_land = county_land.drop(['?'],\
axis = 1)
county_land.head(4)
county_land.replace("NA", np.nan, inplace=True)
county_land[county_land.isnull().any(axis=1)]
# exlcuding rows with missing values for annual
county_annual = county_annual.dropna()
county_annual[county_annual.isnull().any(axis=1)]
# exlcuding rows with missing values for population
county_pop = county_pop.dropna()
county_pop[county_pop.isnull().any(axis=1)]
# grab just three columns from the land table, so let’s create a new one called land_area
land_area = county_land[["Areaname", "STCOU", "LND110210D"]]
# STCOU rename to coutyfips
land_area.rename(columns={'STCOU': 'countyfips'}, inplace=True)
#check to make sure it was done
land_area.head(4)
# join the tables: left join the tables land_area and population
county_info = pd.merge(county_pop,land_area, on='countyfips', how='left')
county_info.head(10)
Annual = pd.merge(county_annual,county_info, how='left')
Annual.head(10)
Annual["count"] = pd.to_numeric(Annual["count"])
# average number of pills across year
df_opioid = Annual.groupby('year')['count'].mean()
df_opioid.head(3)
df_mean = Annual.drop(["BUYER_COUNTY", "BUYER_STATE", "DOSAGE_UNIT", "countyfips", "STATE", "COUNTY", "county_name", "NAME", "variable", "population", "Areaname", "LND110210D"], axis = 1).groupby(["year"]).mean().reset_index()
fig = px.scatter(df_mean, x = "year", y = "count")
fig.show()
/var/folders/qw/mgyk2j7x2y54vlp8m3xs47qw0000gn/T/ipykernel_45746/2831751930.py:36: SettingWithCopyWarning: A value is trying to be set on a copy of a slice from a DataFrame See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy